* This .do file creates the sample for 2001-2005 stduent cohorts who accumualted at leaat 1.6 EFTS in 2002-2006. We follow similar procedures as in basesample.do.
*Clean .dta downloaded by RStudio

use "I:\MAA2015-20 Credit and Education\data\course00_08.dta", clear
egen provider=mode(moe_crs_provider_code),by(snz_uid moe_crs_year_nbr)
egen nzsced = mode(moe_crs_qual_nzsced_code),by(snz_uid moe_crs_year_nbr)
egen firstyr=mode(moe_crs_first_tertiary_year_nbr),by(snz_uid moe_crs_year_nbr)
collapse  provider nzsced firstyr  (max)  moe_crs_birth_year_nbr moe_crs_last_school_decile_code moe_crs_disability_code moe_crs_sex_snz_code  moe_crs_ethnic1_snz_code,by( snz_uid moe_crs_year_nbr)
gen year= moe_crs_year_nbr
compress*
save "demo08.dta",replace

use  "I:\MAA2015-20 Credit and Education\data\complete.dta",clear
gen year=moe_com_year_nbr
keep snz_uid snz_moe_uid year moe_com_qual_code moe_com_provider_code moe_com_qual_nzsced_code moe_com_qacc_code  
compress*
gen qualba_complete=(moe_com_qacc_code<=20)
keep if qualba_complete==1
sort snz_uid year moe_com_qacc_code
egen highest=min( moe_com_qacc_code),by(snz_uid year)
drop if highest!= moe_com_qacc_code
compress*
drop highest qualba_complete
duplicates drop snz_uid year,force
egen min=min(year),by(snz_uid)
drop if min!=year
gen ba_year=year
rename moe_com_provider_code ba_provider
rename moe_com_qual_code ba_qual
rename moe_com_qual_nzsced_code ba_nzsced
rename moe_com_qacc_code ba_qacc
drop min
compress*
save "ba_year_all.dta",replace

use "I:\MAA2015-20 Credit and Education\data\borrow00_17.dta", clear
gen loan_amt=msd_sla_ann_drawn_fee_amt+ msd_sla_ann_drawn_living_cst_mt+ msd_sla_ann_admin_fee_amt+ msd_sla_ann_drawn_course_rel_mt
egen minyear=min(msd_sla_year_nbr) if loan_amt>=100,by(snz_uid)
drop if msd_sla_year_nbr>=2009
gen year = msd_sla_year_nbr
keep minyear snz_uid snz_ird_uid year loan_amt msd_sla_balance_amt msd_sla_sa_study_start_date msd_sla_sa_study_end_date msd_sla_sl_study_status_code msd_sla_ann_allowance_paid_amt  msd_sla_ann_accommodation_amt 
sort snz_uid year loan_amt
duplicates drop snz_uid year,force
compress*
save "loan08.dta", replace

*We use courses taken up to 2008 as performance assessment counts for course taken in 2009 and after.  
use "I:\MAA2015-20 Credit and Education\data\course00_08.dta", clear
gen ba_course= (moe_crs_qacc_code==20|moe_crs_qacc_code==12)
egen ba_enrol=max(ba_course),by( snz_uid)
keep if ba_enrol==1
gen end_year=year(moe_crs_end_date)
gen start_year=year(moe_crs_start_date)
gen cross= (end_year!= start_year)
collapse (max) moe_crs_complete_code  moe_crs_qual_type_code ba_course moe_crs_efts_course_factor_nbr  moe_crs_first_tertiary_year_nbr  end_year start_year moe_crs_qacc_code moe_crs_start_date moe_crs_withdrawal_date cross  moe_crs_study_type_code,by(snz_uid snz_moe_uid moe_crs_end_date moe_crs_course_code moe_crs_provider_code  moe_crs_qual_code)
gen     completion=(moe_crs_complete_code==2| moe_crs_complete_code==3| moe_crs_complete_code==4)
replace completion= 0 if (moe_crs_withdrawal_date-moe_crs_start_date )<=30
compress*
save "course_endyear_08.dta", replace

use "course_endyear_08.dta", clear
keep if moe_crs_qual_type_code ==3
egen firstyr=mode(moe_crs_first_tertiary_year_nbr),by(snz_uid)
egen provider=mode(moe_crs_provider_code),by(start_year snz_uid)
replace start_year=2000 if start_year<2000&end_year>=2000
gen pass=(moe_crs_complete_code==2)      if completion==1            
gen efts=  completion*moe_crs_efts_course_factor_nbr
gen efts_pass_startyear = pass*efts    if completion==1 
collapse (max) provider ba_course firstyr (sum)efts efts_pass_startyear moe_crs_efts_course_factor_nbr (min)moe_crs_study_type_code ,by(snz_uid snz_moe_uid start_year)
gen year= start_year
sort snz_uid year
compress*
save "efts_startyear08.dta",replace
collapse  snz_moe_uid, by(snz_uid )
compress*
gen year=2000
forv i=1(1)11 {
gen year`i'=year+`i'
}
rename year year0
reshape long year, i(snz_uid snz_moe_uid) j(tertiary_year)
compress*
save "bal08.dta",replace
use "bal08.dta", clear
merge m:1 snz_uid year using "loan08.dta"
drop if _merge==2
*egen maxloan=max(loan_amt),by(snz_uid)
*drop if maxloan<100|maxloan==.
drop _merge
merge m:1 snz_uid year using "efts_startyear08.dta"
drop if _merge==2
drop  msd_sla_balance_amt  start_year _merge 
sort snz_uid year
by snz_uid: gen efts_acc=sum(efts[_n])
by snz_uid: egen ind=seq() if efts_acc>=1.6
by snz_uid: gen efts_pass_acc=sum(efts_pass_startyear[_n])
gen ratio_pass= efts_pass_acc/ efts_acc
keep if ind >=1&ind<.
drop if year>=2009&ind==1

sort snz_uid ind
by snz_uid: gen      enrolba=(efts[1]<.&efts[1]>=0.1&ba_course==1)              if ind==1
by snz_uid: gen      loaner=(year[1]!=. & loan_amt [1]>=100& loan_amt [1]<.)    if ind==1
by snz_uid: replace  loaner= 0 if (efts[1]==.|efts[1]<0.1)& ind==1
by snz_uid: gen      loan=  (year[2]!=. & loan_amt [2]>=100& loan_amt [2]<.)    if ind==1
by snz_uid: replace  loan= 0 if (efts[2]==.|efts[2]<0.1)&ind==1
by snz_uid: gen      enrol=(efts[2]<.&efts[2]>=0.1)                             if ind==1

egen indyear=min(year),by(snz_uid) 
gen  indyr  =2011-indyear

compress*
save  "sample08.dta",replace
*
use "sample08.dta",clear
merge m:1 snz_uid year using "demo08.dta"
drop if _merge==2
drop _merge
gen age = year- moe_crs_birth_year_nbr if moe_crs_birth_year_nbr!=.
gen white =(moe_crs_ethnic1_snz_code <=129) if moe_crs_ethnic1_snz_code!=.
gen maori =(moe_crs_ethnic1_snz_code>=211& moe_crs_ethnic1_snz_code <=371)  if moe_crs_ethnic1_snz_code!=.
gen otherrace =(moe_crs_ethnic1_snz_code>=500& moe_crs_ethnic1_snz_code <=611)  if moe_crs_ethnic1_snz_code!=.
gen disable =(moe_crs_disability_code==2) if moe_crs_disability_code!=.
gen male =(moe_crs_sex_snz_code==1) if moe_crs_sex_snz_code!=.
gen female =1-male
gen fullyr=( moe_crs_study_type_code==1) if moe_crs_study_type_code!=.
gen uni=(provider>=7001&provider<=7008) if provider!=.
gen sced=int(nzsced/10000)
gen science=(sced>=1&sced<=3) if sced!=.
gen business=(sced==8)        if sced!=.
rename moe_crs_last_school_decile_code  decile
gen treat=(ratio_pass>=0.5)
gen ratio= ratio_pass-0.5
gen treat_ratio=treat*ratio
set matsize 10000
reg enrol treat ratio_pass male age maori disable decile  i.moe_crs_study_type_code business science i.provider   i.firstyr  i.year  if ind==1&loaner==1
gen basesample=e(sample)
keep if basesample==1
drop basesample
gen cohort=firstyr if firstyr>=2000
xi i.cohort
rename _Icohort_# cohort#
gen provider1=provider
replace provider1=7000 if provider1<7001|provider1>7008
xi i.provider1
rename _Iprovider1_# provider#
drop provider1 provider7006
xi i.year
rename _Iyear_# year#
merge m:m snz_uid using "ba_year_all.dta"
drop if _merge==2    
drop _merge
gen indba=ba_year-year  
gen ba_post1yr=(indba<=1)
gen ba_post2yr=(indba<=2)
gen ba_post3yr=(indba<=3)
gen ba_post4yr=(indba<=4)
gen ba_post5yr=(indba<=5)
gen ba_post6yr=(indba<=6)
gen ba_post7yr=(indba<=7)
gen  yr= ba_year - cohort+1
replace yr=. if ba_year<year
replace yr=3 if yr<=3
gen ba_4yr=(yr<=4)
gen ba_5yr=(yr<=5)
gen ba_6yr=(yr<=6)
gen ba_7yr=(yr<=7)
gen ba_8yr=(yr<=8)
replace tertiary_year =year-firstyr +1

drop msd_sla_ann_allowance_paid_amt- msd_sla_sl_study_status_code moe_crs_birth_year_nbr moe_crs_disability_code moe_crs_sex_snz_code ind yr efts efts_pass_startyear moe_crs_efts_course_factor_nbr moe_crs_study_type_code moe_crs_year_nbr nzsced moe_crs_ethnic1_snz_code indyear ba_provider-ba_qacc ba_course indba cohort2006-cohort2008 year2001 year2007-year2008

keep if enrolba==1
keep if ba_year>year 
keep if cohort>=2001&cohort<=2005&age<24
*Course data in 2000 was not complete so we use cohort starting in 2001.
keep if year>=2002&year<=2006
keep if efts_pass_acc<3
compress*
save "basesample08.dta",replace

use"basesample08.dta",clear
duplicates drop snz_uid,force
keep snz_uid 
compress*
save "uid_final08.dta",replace

forv i==2(1)9 {
use "I:\MAA2015-20 Credit and Education\data\IRD_inc_0`i'.dta" ,clear

foreach j in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
gen du_was_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>0&inc_cal_yr_sum_WAS_mth_`j'_amt<.)

if `i'==2 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1280&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==3 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1360&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==4 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1440&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==5 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1520&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==6 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1640&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==7 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1800&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==8 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>1920&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==9 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>2000&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 }
gen sum_du_was=du_was_01+du_was_02+du_was_03+du_was_04+du_was_05+du_was_06+du_was_07+du_was_08+du_was_09+du_was_10+du_was_11+du_was_12
gen sum_du_minwage=du_minwage_01+du_minwage_02+du_minwage_03+du_minwage_04+du_minwage_05+du_minwage_06+du_minwage_07+du_minwage_08+du_minwage_09+du_minwage_10+du_minwage_11+du_minwage_12
gen avg_was=inc_cal_yr_sum_WAS_tot_amt/ sum_du_was
keep inc_cal_yr_sum_year_nbr snz_uid sum_* avg_*
merge m:m snz_uid using "I:\MAA2015-20 Credit and Education\data\demo_all.dta"
keep if _merge==3
drop _merge
bysort snz_sex_code snz_birth_year_nbr:egen rank= rank(avg_was) if avg_was>0&avg_was<.,unique
bysort snz_sex_code snz_birth_year_nbr:egen maxrank= max(rank) if avg_was>0&avg_was<.
gen percentile= rank/ maxrank if avg_was>0&avg_was<.
replace percentile=percentile*100 if avg_was>0&avg_was<.
gen percent=ceil(percentile) if avg_was>0&avg_was<.
replace percent=0 if avg_was==0
replace percent=0 if avg_was==.
merge m:m snz_uid using "uid_final08.dta"
keep if _merge==3
compress*
save "avg_inc200`i'_08.dta",replace
}



forv i==10(1)11 {
use "I:\MAA2015-20 Credit and Education\data\IRD_inc_`i'.dta" ,clear
foreach j in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"{
gen du_was_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>0&inc_cal_yr_sum_WAS_mth_`j'_amt<.)

if `i'==10 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>2040&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 else if `i'==11 {
gen du_minwage_`j'=(inc_cal_yr_sum_WAS_mth_`j'_amt>2080&inc_cal_yr_sum_WAS_mth_`j'_amt<.)
 }
 }
gen sum_du_was=du_was_01+du_was_02+du_was_03+du_was_04+du_was_05+du_was_06+du_was_07+du_was_08+du_was_09+du_was_10+du_was_11+du_was_12
gen sum_du_minwage=du_minwage_01+du_minwage_02+du_minwage_03+du_minwage_04+du_minwage_05+du_minwage_06+du_minwage_07+du_minwage_08+du_minwage_09+du_minwage_10+du_minwage_11+du_minwage_12
gen avg_was=inc_cal_yr_sum_WAS_tot_amt/ sum_du_was
keep inc_cal_yr_sum_year_nbr snz_uid sum_* avg_*
merge m:m snz_uid using "I:\MAA2015-20 Credit and Education\data\demo_all.dta"
keep if _merge==3
drop _merge
bysort snz_sex_code snz_birth_year_nbr:egen rank= rank(avg_was) if avg_was>0&avg_was<.,unique
bysort snz_sex_code snz_birth_year_nbr:egen maxrank= max(rank) if avg_was>0&avg_was<.
gen percentile= rank/ maxrank if avg_was>0&avg_was<.
replace percentile=percentile*100 if avg_was>0&avg_was<.
gen percent=ceil(percentile) if avg_was>0&avg_was<.
replace percent=0 if avg_was==0
replace percent=0 if avg_was==.
merge m:m snz_uid using "uid_final08.dta"
keep if _merge==3
compress*
save "avg_inc20`i'_08.dta",replace
}

use "avg_inc2002_08.dta",clear
forv i==3(1)9 {
append using avg_inc200`i'_08
}
forv j==10(1)11 {
append using avg_inc20`j'_08
}
drop _merge
compress*
save "avg_inc_08.dta",replace

use "avg_inc_08.dta",clear
rename percent was_percent
keep          sum_du_was    sum_du_minwage was_percent  avg_was  snz_uid inc_cal_yr_sum_year_nbr 
reshape wide  sum_du_was    sum_du_minwage was_percent  avg_was ,i( snz_uid) j( inc_cal_yr_sum_year_nbr )
compress*
save "avg_inc_w_08.dta",replace


use "basesample08",clear
merge m:m snz_uid using "avg_inc_w_08.dta"
drop if _merge==2
drop _merge
forv i==2002(1)2011 {


replace avg_was`i'  =0 if avg_was`i'==.
replace sum_du_was`i' = 0 if sum_du_was`i' ==.
replace sum_du_minwage`i' = 0 if sum_du_minwage`i' ==.
replace was_percent`i'  =0 if was_percent`i'==. 


}

forv i==0(1)9 {

gen     avg_was_post`i'yr=.
gen     sum_du_was_post`i'yr= .
gen     sum_du_minwage_post`i'yr= .
gen     was_percent_post`i'yr=.

}

forv i==0(1)9 {
forv j==2002(1)2006{
local k = `j' + `i' 
if `k' <=2011 {
  
replace     avg_was_post`i'yr= avg_was`k'                             if year==`j'  
replace     sum_du_was_post`i'yr= sum_du_was`k'                       if year==`j'
replace     sum_du_minwage_post`i'yr= sum_du_minwage`k'               if year==`j' 
replace     was_percent_post`i'yr=was_percent`k'                      if year==`j'

}
}
}
drop sum_du_was2002-was_percent2011
compress*
save "basesample_earn_08.dta",replace

 
 
